{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "29ddd15d-a3c5-4f4e-a678-873f56162724",
   "metadata": {},
   "outputs": [],
   "source": [
    "import requests\n",
    "from bs4 import BeautifulSoup\n",
    "from IPython.display import Markdown, display\n",
    "import ollama"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "479ff514-e8bd-4985-a572-2ea28bb4fa40",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\u001b[?2026h\u001b[?25l\u001b[1Gpulling manifest â ‹ \u001b[K\u001b[?25h\u001b[?2026l\u001b[?2026h\u001b[?25l\u001b[1Gpulling manifest â ™ \u001b[K\u001b[?25h\u001b[?2026l\u001b[?2026h\u001b[?25l\u001b[1Gpulling manifest â ¹ \u001b[K\u001b[?25h\u001b[?2026l\u001b[?2026h\u001b[?25l\u001b[1Gpulling manifest â ¸ \u001b[K\u001b[?25h\u001b[?2026l\u001b[?2026h\u001b[?25l\u001b[1Gpulling manifest â ¼ \u001b[K\u001b[?25h\u001b[?2026l\u001b[?2026h\u001b[?25l\u001b[1Gpulling manifest â ´ \u001b[K\u001b[?25h\u001b[?2026l\u001b[?2026h\u001b[?25l\u001b[1Gpulling manifest â ¦ \u001b[K\u001b[?25h\u001b[?2026l\u001b[?2026h\u001b[?25l\u001b[1Gpulling manifest â § \u001b[K\u001b[?25h\u001b[?2026l\u001b[?2026h\u001b[?25l\u001b[1Gpulling manifest â ‡ \u001b[K\u001b[?25h\u001b[?2026l\u001b[?2026h\u001b[?25l\u001b[1Gpulling manifest â � \u001b[K\u001b[?25h\u001b[?2026l\u001b[?2026h\u001b[?25l\u001b[1Gpulling manifest \u001b[K\n",
      "pulling 2bada8a74506... 100% â–•â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–� 4.7 GB                         \u001b[K\n",
      "pulling 66b9ea09bd5b... 100% â–•â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–�   68 B                         \u001b[K\n",
      "pulling eb4402837c78... 100% â–•â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–� 1.5 KB                         \u001b[K\n",
      "pulling 832dd9e00a68... 100% â–•â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–�  11 KB                         \u001b[K\n",
      "pulling 2f15b3218f05... 100% â–•â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–�  487 B                         \u001b[K\n",
      "verifying sha256 digest \u001b[K\n",
      "writing manifest \u001b[K\n",
      "success \u001b[K\u001b[?25h\u001b[?2026l\n"
     ]
    }
   ],
   "source": [
    "# Let's just make sure the model is loaded\n",
    "\n",
    "!ollama pull qwen2.5\n",
    "MODEL = \"qwen2.5\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "6de38216-6d1c-48c4-877b-86d403f4e0f8",
   "metadata": {},
   "outputs": [],
   "source": [
    "headers = {\n",
    " \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36\"\n",
    "}\n",
    "\n",
    "class Website:\n",
    "\n",
    "    def __init__(self, url):\n",
    "        \"\"\"\n",
    "        Create this Website object from the given url using the BeautifulSoup library\n",
    "        \"\"\"\n",
    "        self.url = url\n",
    "        response = requests.get(url, headers=headers)\n",
    "        soup = BeautifulSoup(response.content, 'html.parser')\n",
    "        self.title = soup.title.string if soup.title else \"No title found\"\n",
    "        for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\"]):\n",
    "            irrelevant.decompose()\n",
    "        self.text = soup.body.get_text(separator=\"\\n\", strip=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "a531b8f6-d4f8-4140-b54d-bcf280bd7a99",
   "metadata": {},
   "outputs": [],
   "source": [
    "system_prompt = \"You are an assistant that analyzes the contents of a website \\\n",
    "and provides a short summary, ignoring text that might be navigation related. \\\n",
    "Respond in markdown.\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "6b46ff43-4817-431e-8335-8d2cc9957910",
   "metadata": {},
   "outputs": [],
   "source": [
    "def user_prompt_for(website):\n",
    "    user_prompt = f\"You are looking at a website titled {website.title}\"\n",
    "    user_prompt += \"\\nThe contents of this website is as follows; \\\n",
    "please provide a summary of this website in markdown. \\\n",
    "If it includes news or announcements, then summarize these too.(only if they are present)\\n\\n\"\n",
    "    user_prompt += website.text\n",
    "    return user_prompt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "13a3a001-5d91-4269-ab60-493bbf35bda4",
   "metadata": {},
   "outputs": [],
   "source": [
    "def messages_for(website):\n",
    "    return [\n",
    "        {\"role\": \"system\", \"content\": system_prompt},\n",
    "        {\"role\": \"user\", \"content\": user_prompt_for(website)}\n",
    "    ]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "c61ad738-9395-415d-b88b-d4a70d4331aa",
   "metadata": {},
   "outputs": [],
   "source": [
    "def summarize(url):\n",
    "    website = Website(url)\n",
    "    response = ollama.chat(model=MODEL, messages=messages_for(website))\n",
    "    return response['message']['content']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "bdbcfa75-980b-4542-872d-af8b20546b5d",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'```markdown\\n# Tailwind CSS Cheat Sheet Summary\\n\\nThis website serves as a comprehensive guide for developers using Tailwind CSS, providing quick access to commonly used utility classes and configurations. The content is organized into sections such as typography, layout, colors, shadows, and more, making it easy for users to find specific styles or settings.\\n\\n- **Typography**: Includes various font sizes, weights, line heights, and other typographic utilities.\\n- **Layout**: Features columns, grid, flexbox, spacing, and responsive design utilities.\\n- **Colors**: Lists predefined color palettes and utility classes for color manipulation.\\n- **Shadows**: Provides options to add depth and dimension to elements through shadow effects.\\n- **Other Sections**: Covers forms, animations, and more, with concise descriptions and examples.\\n\\nThe site is designed to be a one-stop reference tool, allowing developers to quickly apply Tailwind CSS styles without having to consult the official documentation every time.\\n```'"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "summarize(\"https://www.creative-tim.com/twcomponents/cheatsheet/\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "817e6f73-1abe-4f79-9010-f4264e0f324a",
   "metadata": {},
   "outputs": [],
   "source": [
    "def display_summary(url):\n",
    "    summary = summarize(url)\n",
    "    display(Markdown(summary))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "504c19cf-9add-4a78-a028-fe2710e0604d",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/markdown": [
       "# Summary\n",
       "\n",
       "**Home Page:**\n",
       "- The website is titled \"Home - Edward Donner\" and introduces Ed, who enjoys coding, experimenting with large language models (LLMs), DJing, and engaging in Hacker News.\n",
       "- He co-founded Nebula.io, an AI company focusing on helping people discover their potential. The platform uses proprietary LLMs for talent discovery and has been patented.\n",
       "\n",
       "**News/Announcements:**\n",
       "- **January 23, 2025:** LLM Workshop – Hands-on with Agents\n",
       "- **December 21, 2024:** Welcome, SuperDataScientists!\n",
       "- **November 13, 2024:** Mastering AI and LLM Engineering – Resources\n",
       "- **October 16, 2024:** From Software Engineer to AI Data Scientist – resources\n",
       "\n",
       "**Connect Section:**\n",
       "- Provides ways to get in touch with Ed, including email, LinkedIn, Twitter, Facebook, and a newsletter subscription form.\n",
       "\n",
       "**Additional Content:**\n",
       "- **Connect Four:** Describes it as an arena where LLMs compete against each other.\n",
       "- **About Page:** Further details about Ed's background and Nebula.io."
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "display_summary('https://edwarddonner.com')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "20d621cb-6bfb-41a6-bd98-a51ef0a8b158",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.11"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
